import requests
from bs4 import BeautifulSoup
import re
headers = {
    'Accept': '*/*',
    'Cookie': 'll="118282"; bid=y3lYf7DmYfo; __utmc=30149280; __utmc=223695111; _vwo_uuid_v2=DBC5532C3A7D8D3BEC60FBCCB1CFA4B6B|8ab97b2cf1a730411fecef0f35f45cbd; _pk_ref.100001.4cf6=%5B%22%22%2C%22%22%2C1652770813%2C%22https%3A%2F%2Fwww.douban.com%2F%22%5D; _pk_ses.100001.4cf6=*; __utma=30149280.387804381.1652683914.1652757224.1652770813.4; __utmb=30149280.0.10.1652770813; __utmz=30149280.1652770813.4.4.utmcsr=douban.com|utmccn=(referral)|utmcmd=referral|utmcct=/; __utma=223695111.1857267429.1652683965.1652683965.1652770813.2; __utmb=223695111.0.10.1652770813; __utmz=223695111.1652770813.2.2.utmcsr=douban.com|utmccn=(referral)|utmcmd=referral|utmcct=/; ap_v=0,6.0; __gads=ID=3e832771287cb726-22e8f24e33d3007c:T=1652770847:RT=1652770847:S=ALNI_MbODQUf5WnJBGIseVHAYx7b5wsAKg; __gpi=UID=0000056d2866078b:T=1652770847:RT=1652770847:S=ALNI_MYxEeMiTCsJtLOKblnNn2m_Paplzg; _pk_id.100001.4cf6=af4c7123eecc0a05.1652683965.2.1652771403.1652684354.',
    'Referer': 'https://movie.douban.com/explore',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.67 Safari/537.36',
    }
TT = int(input('请输入要爬取的数量(数字不可超过20)：'))    #   值*20 就是爬取的数量   3x20=60  如果值为3，则会爬取60个电影数据
R = 0  # 变量
F = 0  # 创建变量值
for Total_data in range(TT):
    URL = requests.get(f'https://movie.douban.com/j/search_subjects?type=movie&tag=%E7%83%AD%E9%97%A8&sort=recommend&page_limit=20&page_start={F}',headers=headers)  # 当前网站链接
    content = BeautifulSoup(URL.content, 'html.parser')  # 解析html
    print(content)
    F = F + 20   # 变量
    for i in content:
        for u in range(len(re.findall('"title":"(.*?)"', i))):
            DATA = re.findall('"title":"(.*?)"', i)  # 电影名称
            URL_ = re.findall('"url":"(.*?)"', i)  # 电影链接
            URL_picture = re.findall('"cover":"(.*?)"', i)  # 电影图片链接
            Film_rating = re.findall('"rate":"(.*?)"', i)  # 电影评分
            NEW = re.findall('"is_new":(.*?)}', i)  # 电影新出
            print('电影名称：',DATA[u])
            print('电影链接：',URL_[u].replace('\/', "/"))
            print('图片链接：',URL_picture[u].replace('\/', "/"))
            print('电影评分：',Film_rating[u])
            print('电影新出：',NEW[u])
            print('\n')  # 隔开
        R = R + u + 1
print('总爬取电影数量：',R)